PadFusion
通过指定的填充模式和大小对输入张量进行填充,支持常数填充、反射填充和对称填充三种模式。
- 输入:
input - 输入张量地址。
input_shape - 输入张量形状数组(不超过4维,不足时从后往前补0)。
output_shape - 输出张量形状数组(不超过4维,不足时从后往前补0)。
paddings - 填充大小数组(长度为 (2 × 输入张量维数),不足时从后往前补0)。
padding_mode - 填充模式:Constant=0, Reflect=1, Symmetric=2。
constant_value - 常量填充时的填充值(仅Constant模式使用)。
core_mask(int, 可选) - 核掩码(仅适用于共享存储版本)。
- 输出:
output - 输出数据地址。
formated_input_shape - 格式化后的输入形状数组地址。
formated_output_shape - 格式化后的输出形状数组地址。
formated_paddings - 格式化后的填充数组地址。
in_strides - 输入张量步长数组地址。
out_strides - 输出张量步长数组地址。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持的数据类型:int8, int16, int32, fp32, fp64, cplx64, cplx128
MT7004 支持的数据类型:fp16, fp32, int16, int32, cplx64
- 填充模式说明:
kConstant = 0- 常数填充,使用指定的 constant_value 进行填充kReflect = 1- 反射填充,使用张量边缘的值(不包含边界值)填充输入张量。例如,向 [1, 2, 3, 4] 的两边分别填充2个元素,结果为 [3, 2, 1, 2, 3, 4, 3, 2]。kSymmetric = 2- 对称填充,使用张量边缘的值(包含边界值)填充输入张量。例如,向 [1, 2, 3, 4] 的两边分别填充2个元素,结果为 [2, 1, 1, 2, 3, 4, 4, 3]。
共享存储版本:
-
void i8_padfusion_s(long long *params, int core_mask)
-
void i16_padfusion_s(long long *params, int core_mask)
-
void i32_padfusion_s(long long *params, int core_mask)
-
void fp_padfusion_s(long long *params, int core_mask)
-
void dp_padfusion_s(long long *params, int core_mask)
-
void c64_padfusion_s(long long *params, int core_mask)
-
void c128_padfusion_s(long long *params, int core_mask)
-
void hp_padfusion_s(long long *params, int core_mask)
参数数组结构:
1long long params[12];
2params[0] = (long long)input; // 输入数据地址
3params[1] = (long long)output; // 输出数据地址
4params[2] = (long long)input_shape; // 输入形状数组
5params[3] = (long long)output_shape; // 输出形状数组
6params[4] = (long long)paddings; // 填充数组
7params[5] = (long long)padding_mode; // 填充模式
8params[6] = (long long)constant_value; // 常数填充值的*地址*
9params[7] = (long long)formated_input_shape; // 格式化输入形状
10params[8] = (long long)formated_output_shape; // 格式化输出形状
11params[9] = (long long)formated_paddings; // 格式化填充数组
12params[10] = (long long)in_strides; // 输入步长数组
13params[11] = (long long)out_strides; // 输出步长数组
C调用示例:
1// FT78NE 多核示例 2#include <stdio.h> 3#include <padfusion.h> 4 5int main(void) { 6 srand(time(0)); 7 8 // 输入参数设置 9 int input_shape[4] = {23, 31, 29, 28}; 10 int paddings[8] = {3, 2, 3, 2, 1, 2, 0, 2}; 11 int padding_mode = 2; // kSymmetric 12 double constant_value[2] = {1.0, 0.0}; 13 int core_mask = 0xff; 14 15 // 内存分配(DDR空间) 16 double* input = (double*)0x81000000; 17 double* output = (double*)0x82000000; 18 int* formated_input_shape = (int*)0x84000000; 19 int* formated_output_shape = (int*)0x85000000; 20 int* formated_paddings = (int*)0x86000000; 21 int* in_strides = (int*)0x87000000; 22 int* out_strides = (int*)0x88000000; 23 int* output_shape = (int*)0x89000000; 24 25 // 计算输出形状 26 for (int i = 0; i < 4; ++i) { 27 output_shape[i] = input_shape[i] + paddings[i * 2] + paddings[i * 2 + 1]; 28 } 29 30 // 初始化input 31 // ... 省略初始化input数据代码 ... 32 33 // 准备参数数组 34 long long params[12]; 35 params[0] = (long long)input; 36 params[1] = (long long)output; 37 params[2] = (long long)input_shape; 38 params[3] = (long long)output_shape; 39 params[4] = (long long)paddings; 40 params[5] = (long long)padding_mode; 41 params[6] = (long long)constant_value; 42 params[7] = (long long)formated_input_shape; 43 params[8] = (long long)formated_output_shape; 44 params[9] = (long long)formated_paddings; 45 params[10] = (long long)in_strides; 46 params[11] = (long long)out_strides; 47 48 // 执行 PadFusion 操作 49 c128_padfusion_s(params, core_mask); 50 return 0; 51}
私有存储版本:
-
void i8_padfusion_p(long long *params)
-
void i16_padfusion_p(long long *params)
-
void i32_padfusion_p(long long *params)
-
void fp_padfusion_p(long long *params)
-
void dp_padfusion_p(long long *params)
-
void c64_padfusion_p(long long *params)
-
void c128_padfusion_p(long long *params)
-
void hp_padfusion_p(long long *params)
C调用示例:
1// MT7004 单核示例 2#include <stdio.h> 3#include <padfusion.h> 4 5 6int main(void) { 7 // 输入参数设置 8 int input_shape[4] = {4, 8, 4, 8}; 9 int paddings[8] = {1, 1, 1, 1, 1, 1, 1, 1}; 10 int padding_mode = 0; // kConstant 11 float constant_value = 0.0f; 12 13 // 内存分配(L2空间) 14 float* input = (float*)0x10000000; 15 float* output = (float*)0x10100000; 16 int* output_shape = (int*)0x10200000; 17 int* formated_input_shape = (int*)0x10300000; 18 int* formated_output_shape = (int*)0x10400000; 19 int* formated_paddings = (int*)0x10500000; 20 int* in_strides = (int*)0x10600000; 21 int* out_strides = (int*)0x10700000; 22 23 // 计算输出形状 24 for (int i = 0; i < 4; ++i) { 25 output_shape[i] = input_shape[i] + paddings[i * 2] + paddings[i * 2 + 1]; 26 } 27 28 // 初始化input 29 // ... 省略初始化input数据代码 ... 30 31 // 准备参数数组 32 long long params[12]; 33 params[0] = (long long)input; 34 params[1] = (long long)output; 35 params[2] = (long long)input_shape; 36 params[3] = (long long)output_shape; 37 params[4] = (long long)paddings; 38 params[5] = (long long)padding_mode; 39 params[6] = (long long)&constant_value; 40 params[7] = (long long)formated_input_shape; 41 params[8] = (long long)formated_output_shape; 42 params[9] = (long long)formated_paddings; 43 params[10] = (long long)in_strides; 44 params[11] = (long long)out_strides; 45 46 // 执行 PadFusion 操作 47 fp_padfusion_p(params); 48 return 0; 49}